This vignette shows how to use Signac with Seurat. There are three parts: Seurat, Signac and then visualization. We use an example CITE-seq data set from 10X Genomics.

Seurat

Start with the standard pre-processing steps for a Seurat object.

library(Seurat)

Download data from 10X Genomics.

download.file("https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_protein_v3/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5", destfile = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")

Create a Seurat object, and then perform SCTransform normalization. Note:

You can use the legacy functions here (i.e., NormalizeData, ScaleData, etc.), use SCTransform or any other normalization method (including no normalization). We did not notice a significant difference in cell type annotations with different normalization methods. We think that it is best practice to use SCTransform, but it is not a necessary step. Signac will work fine without it.

# load dataset
E = Read10X_h5(filename = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
pbmc <- CreateSeuratObject(counts = E$`Gene Expression`, project = "pbmc")

# store mitochondrial percentage in object meta data
pbmc <- PercentageFeatureSet(pbmc, pattern = "^MT-", col.name = "percent.mt")

# run sctransform
pbmc <- SCTransform(pbmc,vars.to.regress = "percent.mt", verbose = FALSE)

Perform dimensionality reduction by PCA and UMAP embedding. Note:

*Signac actually needs these functions since it uses the nearest neighbor graph generated by Seurat.

# These are now standard steps in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)

Signac

First, make sure you have the Signac package installed.

devtools::install_github("mathewchamberlain/Signac")
library(Signac)

Generate Signac labels for the Seurat object. Note:

Optionally, you can do parallel computing by setting num.cores > 1 in the Signac function. Run time is ~minutes for ~10,000 cells.

# Run Signac
data("training_HPCA")
labels <- Signac(pbmc, R = training_HPCA)
celltypes = Generate_lbls(labels, E = pbmc)

Visualizations

Now we can visualize the cell type classifications at many different levels: Immune and nonimmune

pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$Immune, col.name = "immmune")
pbmc <- Seurat::SetIdent(pbmc, value='immmune')
DimPlot(pbmc)

pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$L2, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellStates_novel)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

Add protein expression information

pbmc[["ADT"]] <- CreateAssayObject(counts = E$`Antibody Capture`[,colnames(E$`Antibody Capture`) %in% colnames(pbmc)])
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
pbmc <- NormalizeData(pbmc, assay = "ADT", normalization.method = "CLR")
pbmc <- ScaleData(pbmc, assay = "ADT")

Visualize protein information on the RNA clustering result

FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"), ncol = 2)
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay instead

RidgePlot(pbmc, features = c("CD8a-TotalSeqB"))
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay instead

Identify differentially expressed proteins between clusters

# Downsample the clusters to a maximum of 500 cells each (makes the heatmap easier to see for
# small clusters)
pbmc.small <- subset(pbmc, downsample = 500)

# Find protein markers for all clusters, and draw a heatmap
adt.markers <- FindAllMarkers(pbmc.small, assay = "ADT", only.pos = TRUE, verbose = F)
DoHeatmap(pbmc.small, features = unique(adt.markers$gene), assay = "ADT", angle = 90) + NoLegend()

DefaultAssay(pbmc) <- "ADT"

Cluster the data based on protein expression information.

pbmc <- FindVariableFeatures(pbmc, selection.method = "vst")
pbmc <- ScaleData(pbmc)
pbmc <- RunPCA(pbmc, verbose = F)
## Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large a percentage of total singular
## values, use a standard svd instead.
## Warning in irlba(A = t(x = object), nv = npcs, ...): did not converge--results might be invalid!; try increasing
## work or maxit
pbmc <- FindNeighbors(pbmc)
pbmc <- FindClusters(pbmc, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7865
## Number of edges: 260140
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8849
## Number of communities: 15
## Elapsed time: 0 seconds
pbmc <- RunUMAP(pbmc, dims = 1:10)

Visualize Signac cell types on protein clusters

lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"))
## Warning: Found the following features in more than one assay, excluding the default. We will not include these in
## the final dataframe: CD8A
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The following requested variables were
## not found: CD8A

Save results

saveRDS(pbmc, file = "citeseq_signac.rds")

Session information

## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /site/ne/app/x86_64/R/v3.5.0/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bzinb_1.0.4        Signac_2.0.7       pscl_1.5.5         MASS_7.3-51.1      forcats_0.5.0     
##  [6] stringr_1.4.0      dplyr_0.8.5        purrr_0.3.3        readr_1.3.1        tidyr_1.1.1       
## [11] tibble_3.0.3       tidyverse_1.3.0    sctransform_0.2.1  Seurat_3.2.0       ggplot2_3.3.2     
## [16] veni_1.0.3         Matrix.utils_0.9.8 Matrix_1.2-18      biomaRt_2.38.0    
## 
## loaded via a namespace (and not attached):
##   [1] reticulate_1.16-9001  neuralnet_1.44.2      lme4_1.1-23           tidyselect_1.1.0     
##   [5] RSQLite_2.2.0         AnnotationDbi_1.44.0  htmlwidgets_1.5.1     grid_3.5.0           
##   [9] BiocParallel_1.16.6   Rtsne_0.15            munsell_0.5.0         codetools_0.2-16     
##  [13] ica_1.0-2             statmod_1.4.34        future_1.16.0         miniUI_0.1.1.1       
##  [17] withr_2.1.2           colorspace_1.4-0      GOSemSim_2.8.0        Biobase_2.42.0       
##  [21] knitr_1.28            rstudioapi_0.11       stats4_3.5.0          ROCR_1.0-7           
##  [25] tensor_1.5            DOSE_3.8.2            listenv_0.8.0         labeling_0.3         
##  [29] urltools_1.7.3        polyclip_1.10-0       bit64_0.9-7           farver_2.0.3         
##  [33] rhdf5_2.27.13         vctrs_0.3.2           generics_0.0.2        xfun_0.12            
##  [37] randomForest_4.6-14   R6_2.4.1              graphlayouts_0.6.0    rsvd_1.0.3           
##  [41] RJSONIO_1.3-1.4       bitops_1.0-6          spatstat.utils_1.17-0 fgsea_1.8.0          
##  [45] gridGraphics_0.5-0    assertthat_0.2.1      promises_1.1.0        scales_1.1.0         
##  [49] ggraph_2.0.3          enrichplot_1.2.0      gtable_0.3.0          npsurv_0.4-0         
##  [53] globals_0.12.5        goftest_1.2-2         tidygraph_1.2.0       rlang_0.4.8          
##  [57] aplpack_1.3.3         splines_3.5.0         lazyeval_0.2.2        broom_0.7.0          
##  [61] europepmc_0.4         checkmate_2.0.0       yaml_2.2.1            BiocManager_1.30.10  
##  [65] reshape2_1.4.4        abind_1.4-5           modelr_0.1.8          backports_1.1.6      
##  [69] httpuv_1.5.2          qvalue_2.14.1         tools_3.5.0           tcltk_3.5.0          
##  [73] ggplotify_0.0.5       ellipsis_0.3.0        gplots_3.0.3          RColorBrewer_1.1-2   
##  [77] BiocGenerics_0.28.0   ggridges_0.5.2        Rcpp_1.0.4.6          plyr_1.8.6           
##  [81] progress_1.2.2        RCurl_1.98-1.1        prettyunits_1.1.1     rpart_4.1-15         
##  [85] deldir_0.1-28         pbapply_1.4-2         viridis_0.5.1         cowplot_1.0.0        
##  [89] S4Vectors_0.20.1      zoo_1.8-4             grr_0.9.5             haven_2.3.1          
##  [93] ggrepel_0.8.2         cluster_2.1.0         fs_1.4.1              magrittr_1.5         
##  [97] RSpectra_0.16-0       data.table_1.13.0     DO.db_2.9             reprex_0.3.0         
## [101] lmtest_0.9-37         triebeard_0.3.0       RANN_2.6.1            reactome.db_1.66.0   
## [105] packrat_0.5.0         fitdistrplus_1.0-14   evaluate_0.14         hms_0.5.3            
## [109] patchwork_1.0.1       lsei_1.2-0            mime_0.9              xtable_1.8-4         
## [113] XML_3.99-0.3          readxl_1.3.1          IRanges_2.16.0        gridExtra_2.3        
## [117] compiler_3.5.0        KernSmooth_2.23-16    crayon_1.3.4          minqa_1.2.4          
## [121] htmltools_0.4.0       mgcv_1.8-31           later_1.0.0           lubridate_1.7.8      
## [125] ReactomePA_1.26.0     DBI_1.1.0             tweenr_1.0.1          dbplyr_1.4.4         
## [129] rappdirs_0.3.1        boot_1.3-24           cli_2.0.2             gdata_2.18.0         
## [133] parallel_3.5.0        igraph_1.2.5          pkgconfig_2.0.3       rvcheck_0.1.8        
## [137] plotly_4.9.2.1        xml2_1.3.1            rvest_0.3.5           digest_0.6.18        
## [141] RcppAnnoy_0.0.16      graph_1.60.0          spatstat.data_1.4-3   rmarkdown_2.1        
## [145] cellranger_1.1.0      leiden_0.3.3          fastmatch_1.1-0       uwot_0.1.8           
## [149] shiny_1.4.0.2         gtools_3.8.1          graphite_1.28.2       nloptr_1.2.2.1       
## [153] glasso_1.11           lifecycle_0.2.0       nlme_3.1-145          jsonlite_1.6.1       
## [157] Rhdf5lib_1.4.3        limma_3.38.3          viridisLite_0.3.0     fansi_0.4.1          
## [161] pillar_1.4.3          lattice_0.20-41       fastmap_1.0.1         httr_1.4.1           
## [165] survival_3.2-3        GO.db_3.7.0           glue_1.4.0            UpSetR_1.4.0         
## [169] spatstat_1.64-1       png_0.1-7             bit_1.1-15.2          class_7.3-16         
## [173] ggforce_0.3.2         stringi_1.4.6         blob_1.2.1            caTools_1.17.1.1     
## [177] memoise_1.1.0         e1071_1.7-3           irlba_2.3.3           future.apply_1.4.0   
## [181] ape_5.3

A work by Mathew Chamberlain

mathew.chamberlain@sanofi.com